** Data reading and variable selection from raw data
** Hungarian Social Mobility and Life History Survey 1983


** 01. Reading data **

cap log close
clear all
set more off
cd /*insert you work directory here*/


#delimit ;
infix
region 1-5
district 6-8
flat 9-11
hhold 14-15
sex 17-17
birthyear 18-19
element 22-22
typhisc 23-23
college 30-30
dadeduc 118-118 
dadocc14 159-162
dadtyp14 165-165
dadskl14 166-166
momeduc 195-195 
momocc14 236-239
momtyp14 242-242
momskl14 243-243
occ01 266-269
skl01 270-270
clas01 279-279
occ02 282-285
skl02 286-286
clas02 295-295
occ03 298-301
skl03 302-302
clas03 311-311
occ04 314-317
skl04 318-318
clas04 327-327
occ05 330-333
skl05 334-334
clas05 343-343
occ06 346-349
skl06 350-350
clas06 359-359
occ07 362-365
skl07 366-366
clas07 375-375
occ08 378-381
skl08 382-382
clas08 391-391
occ09 394-397
skl09 398-398
clas09 407-407
occ10 410-413
skl10 414-414
clas10 423-423
occ11 426-429
skl11 430-430
clas11 439-439
occ12 442-445
skl12 446-446
clas12 455-455
sibsex1 633-633
sibage1 634-635
sibsex2 643-643
sibage2 644-645
sibsex3 653-653
sibage3 654-655
sibsex4 663-663
sibage4 664-665
sibsex5 673-673
sibage5 674-675
sibsex6 683-683
sibage6 684-685
sibsex7 693-693
sibage7 694-695
using /*read your data here*/;
#delimit cr

* variable labels
lab var region "id for the city/village"
lab var district "id for the enumeration district"
lab var flat "id for the given dwelling unit"
lab var hhold "id for the household"
lab var sex "sex"
lab var birthyear "last two digits of the birth year"
lab var element "number of elementary classes finished"
lab var typhisc "type of secondary school finished first"
lab var college "level of higher educational institution finished first"
lab var dadeduc "highest education of father"
lab var dadocc14 "occupation of father when R was 14-18 yrs old"
lab var dadtyp14 "class of father when R was 14-18 yrs old"
lab var dadskl14 "skill level of father when R was 14-18 yrs old"
lab var momeduc "highest education of mother"
lab var momocc14 "occupation of mother when R was 14-18 yrs old"
lab var momtyp14 "class of mother when R was 14-18 yrs old"
lab var momskl14 "skill level of mother when R was 14-18 yrs old"
forvalue i=1/9 {
lab var occ0`i' "occupation number `i'"
lab var skl0`i' "skill level of occupation number `i'"
lab var clas0`i' "class of worker for occupation number `i'"
}
forvalue i=10/12 {
lab var occ`i' "occupation number `i'"
lab var skl`i' "skill level of occupation number `i'"
lab var clas`i' "class of worker for occupation number `i'"
}
forvalue i=1/7 {
lab var sibsex`i' "brother or sister"
lab var sibage`i' "birth year of brother/sister"
}


* value lables
#delimit ;
lab def sex 1 "male" 2 "female";
lab val sex sibsex* sex;

lab def typhisc 0 "no education above primary level" 
				1 "academic secondary school"
				2 "vocational secondary school"
				3 "technical secondary school"
				4 "other, granting degree or diploma"
				5 "diploma of completion (SZET)"
				9 "unknown";
lab val typhisc typhisc;

lab def college 1 "university (egyetem)"
				2 "college, academy (foiskola)"
				3 "other higher education institute (felsofoku tanintezet)"
				9 "unknown";
lab val college college;
				
lab def dadeduc 0 "not attend school"
				1 "elementary school, less than 8 classes completed"
				2 "completed 8 classes elementary school"
				3 "elementary school and apparentice training"
				4 "incomplete secondary school"
				5 "completed secondary school diploma"
				6 "completed secondary school diploma, apparentice training"
				7 "completed higher education institute"
				8 "university or college level diploma"
				9 "unknown";
lab val dadeduc momeduc dadeduc;

lab def class   1 "employee"
				2 "coorperative member"
				3 "self-employed"
				4 "helping family member"
				5 "pension" 
				6 "maternity leave";
lab val dadtyp14 momtyp14 clas* class;

lab def skill   1 "not manual"
				2 "skilled manual"
				3 "semi-skilled manual"
				4 "unskilled manual";
lab val dadskl14 momskl14 skl* skill;
#delimit cr

numlabel, add


** 02. Consructing year and country variables **

ge year=1983
lab var year "survey year"

ge country=348
lab var country "ISO country code"
//hungary: 348 (see "ISO Country Codes.pdf) 



** 03. ID variables **

ge hid=10000000*region+district*10000+flat*10+hhold
lab var hid "household id"

ge pid=_n
lab var pid "person id"


** 04. Basic Demographics (Sex and Age/birth year) **

ge birthyr=.
replace birthyr=birthyear+1900 if inrange(birthyear,0,69)
replace birthyr=birthyear+1800 if inrange(birthyear,86,99)
lab var birthyr "year of birth"

ge age=year-birthyr
lab var age "age"




** 05. Siblings **

recode sibsex* (0=.)
// sex of siblings are coded 0 when missing

ge nbro=0
ge nsis=0
forvalue i=1/7 {
replace nbro=nbro+1 if sibsex`i'==1
replace nsis=nsis+1 if sibsex`i'==2
}

ge nsibs=nbro+nsis

* recoding birth year of siblings
forvalues i=1/7 {
replace sibage`i'=. if sibage`i'==0 & sibsex`i'==.
replace sibage`i'=1900+sibage`i' if inrange(sibage`i',0,83)
replace sibage`i'=1800+sibage`i' if inrange(sibage`i',84,99)
}

ge birthorder=1
forvalues i=1/7 {
replace birthorder=birthorder+1 if sibage`i'<birthyr
}

lab var nbro "number of brothers"
lab var nsis "number of sisters"
lab var nsibs "number of siblings"
lab var birthorder "birth order"


** 06. Own education **

ge educ_s=typhisc
lab var educ_s "type of secondary school completed"
lab val educ_s typhisc

ge educ_t=college
lab var educ_t "type of tertiary education completed"
lab val educ_t college
replace educ_t=. if educ_t==0

** 07. Parents' education: Father and/or Mother **

ge faeduc=dadeduc
ge maeduc=momeduc
lab var faeduc "father's education"
lab var maeduc "mother's education"
lab val faeduc maeduc dadeduc



** 08. Own occupation **

ge firstocc=occ01
ge firstclass=clas01
ge firstskill=skl01
recode firstocc firstclass firstskill (0=.)
lab var firstocc "first occupation"
lab var firstclass "class of worker for first occupation"
lab var firstskill "skill level for first occupation"
lab val firstclass class
lab val firstskill skill

ge occ=.
ge class=.
ge skill=.
forvalues i=1/9 {
recode occ0`i' clas0`i' skl0`i' (0=.)
replace occ=occ0`i' if occ0`i'!=.
replace class=clas0`i' if clas0`i'!=.
replace skill=skl0`i' if skl0`i'!=.
}
forvalues i=10/12 {
recode occ`i' clas`i' skl`i' (0=.)
replace occ=occ`i' if occ`i'!=.
replace class=clas`i' if clas`i'!=.
replace skill=skl`i' if skl`i'!=.
}

lab var occ "current/most recent occupation"
lab var class "class of workers for current/most recent occupation"
lab var skill "skill level for current/most recent occupation"
lab val class class
lab val skill skill


** 09. Parents' occupation **

ge faocc=dadocc14
ge maocc=momocc14
lab var faocc "father's occupation when R was 14-18 yrs"
lab var maocc "mother's occupation when R was 14-18 yrs"

rename dadtyp14 faclass
rename momtyp14 maclass

rename dadskl14 faskill
rename momskl14 maskill

recode faocc maocc faclass maclass faskill maskill (0=.)


** 10. Tabulate the Identified Variables **

log using /*insert you work directory here*/, replace text

** Data reading and variable selection from raw data
** Hungarian Social Mobility and Life History Survey 1983

** Sex **
tab sex

** Age, Birth Year **
sum age birthyr, d

** Siblings **
sum nsibs nbro nsis birthorder, d

** R's Own Education **
tab1 educ_s educ_t

** Parental Education **
tab1 faeduc maeduc 

** R's Own Occupation **
tab1 firstocc firstclass firstskill 
tab1 occ class skill

** Parental Occupation **
tab1 faocc faclass faskill maocc maclass maskill

log close


** 11. Keep the identified variables only

keep year country hid pid sex age birthyr ///
	 nbro nsis nsibs birthorder ///
	 educ_s educ_t faeduc maeduc ///
	 firstocc firstclass firstskill occ class skill ///
	 faocc maocc faclass maclass faskill maskill



** 12. Save the Data File **

saveold /*insert you work directory here*/, replace



** 13. Homoginising education **
** Own Education **
//need to incorporate type of tertiary education into type of secondary school completed
ge educ_cat=educ_s
replace educ_cat=7 if educ_t==1
replace educ_cat=8 if educ_t==2
replace educ_cat=6 if educ_t==3
lab var educ_cat "respondent's highest education"
lab def educ_cat 0 "no education above primary level" 1 "academic secondary school " 2 "vocational secondary school" ///
                 3 "technical secondary school" 4 "other, granting degree or diploma" 5 "diploma of completion (SZET)" ///
		         6 "other higher education institute" 7 "university (egyetem)" 8 "college, academy (foiskola)" 
lab val educ_cat educ_cat

ge educ_ISCED=100 if educ_cat==0 //no education above primary level
replace educ_ISCED=340 if educ_cat==1 
replace educ_ISCED=350 if educ_cat==2
replace educ_ISCED=350 if educ_cat==3 
replace educ_ISCED=665 if educ_cat==4
replace educ_ISCED=665 if educ_cat==5 
replace educ_ISCED=665 if educ_cat==6
replace educ_ISCED=766 if educ_cat==7 
replace educ_ISCED=665 if educ_cat==8
lab var educ_ISCED "respondent highest education in ISCED code"

ge educ_yrs=4 if educ_cat==0
replace educ_yrs=12 if educ_cat==1
replace educ_yrs=10 if educ_cat==2
replace educ_yrs=10 if educ_cat==3
replace educ_yrs=12 if educ_cat==4
replace educ_yrs=12 if educ_cat==5
replace educ_yrs=15 if educ_cat==6
replace educ_yrs=17 if educ_cat==7
replace educ_yrs=16 if educ_cat==8
lab var educ_yrs "respondent highest education in years"

** Parents Education **

ge faeduc_flag=1 

rename faeduc faeduc_cat
rename maeduc maeduc_cat

ge faeduc_yrs=0 if faeduc_cat==0
replace faeduc_yrs=6 if faeduc_cat==1
replace faeduc_yrs=8 if faeduc_cat==2
replace faeduc_yrs=8 if faeduc_cat==3
replace faeduc_yrs=8 if faeduc_cat==4
replace faeduc_yrs=12 if faeduc_cat==5
replace faeduc_yrs=12 if faeduc_cat==6
replace faeduc_yrs=15 if faeduc_cat==7
replace faeduc_yrs=17 if faeduc_cat==8
replace faeduc_yrs=. if faeduc_cat==9
lab var faeduc_yrs "father's education in years"

ge faeduc_ISCED=100 if faeduc_cat==0 //no education above primary level
replace faeduc_ISCED=340 if faeduc_cat==1 
replace faeduc_ISCED=350 if faeduc_cat==2
replace faeduc_ISCED=350 if faeduc_cat==3 
replace faeduc_ISCED=665 if faeduc_cat==4
replace faeduc_ISCED=665 if faeduc_cat==5 
replace faeduc_ISCED=665 if faeduc_cat==6
replace faeduc_ISCED=766 if faeduc_cat==7 
replace faeduc_ISCED=665 if faeduc_cat==8
replace faeduc_ISCED=. if faeduc_cat==9
lab var faeduc_ISCED "father highest education in ISCED code"

ge maeduc_yrs=0 if maeduc_cat==0
replace maeduc_yrs=6 if maeduc_cat==1
replace maeduc_yrs=8 if maeduc_cat==2
replace maeduc_yrs=8 if maeduc_cat==3
replace maeduc_yrs=8 if maeduc_cat==4
replace maeduc_yrs=12 if maeduc_cat==5
replace maeduc_yrs=12 if maeduc_cat==6
replace maeduc_yrs=15 if maeduc_cat==7
replace maeduc_yrs=17 if maeduc_cat==8
replace maeduc_yrs=. if maeduc_cat==9
lab var maeduc_yrs "mother's education in years"

ge maeduc_ISCED=100 if maeduc_cat==0 //no education above primary level
replace maeduc_ISCED=340 if maeduc_cat==1 
replace maeduc_ISCED=350 if maeduc_cat==2
replace maeduc_ISCED=350 if maeduc_cat==3 
replace maeduc_ISCED=665 if maeduc_cat==4
replace maeduc_ISCED=665 if maeduc_cat==5 
replace maeduc_ISCED=665 if maeduc_cat==6
replace maeduc_ISCED=766 if maeduc_cat==7 
replace maeduc_ISCED=665 if maeduc_cat==8
replace maeduc_ISCED=. if maeduc_cat==9
lab var maeduc_ISCED "mother highest education in ISCED code"


** 14. Homoginising sibling**
//cutoff
ge nbro_flag=7
lab var nbro_flag "cutoff of number of brothers"
ge nsis_flag=7
lab var nsis_flag "cutoff of number of sisters"
ge nsibs_flag=7
lab var nsibs_flag "cutoff of total number of siblings"


** 15. Tab Education and Sibling Variables **
tab1 sex age birthyr
tab1 educ_cat educ_yrs faeduc_cat faeduc_yrs maeduc_cat maeduc_yrs faeduc_flag 
tab1 nbro nsis nsibs nbro_flag nsis_flag nsibs_flag


** 16. Save the Data File **

saveold /*insert you work directory here*/, replace



